import geopandas as gpd
import osmnx
import rioxarray
import tobler
import xarray as xr
import xvec
from libpysal import graph
from sklearn import neighborsCreating new data
The contents will appear later
This section will contain hands-on material that will appear later.
Map matching
Nearest join
simd = gpd.read_file("data/edinburgh_simd_2020.gpkg")
simd.geometry.explore()Make this Notebook Trusted to load map: File -> Trust Notebook
extent = simd.to_crs(4326).unary_union
pois = osmnx.features_from_polygon(extent, tags={"amenity": ["restaurant", "bar"]})
pois.head(2)| addr:city | addr:housenumber | addr:postcode | addr:street | amenity | cuisine | name | phone | source | wheelchair | ... | contact:tripadvisor | level:ref | operator:wikidata | alt_name:zh | nodes | building | building:levels | building:material | roof:levels | roof:shape | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| element_type | osmid | |||||||||||||||||||||
| node | 21042679 | Edinburgh | 41 | EH8 9NZ | South Clerk Street | restaurant | chinese | Happy Hot Pot | +44 131 667 6337 | survey | no | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 29173148 | Edinburgh | NaN | EH4 6DY | Queensferry Road | restaurant | steak | Miller & Carter | +44 131 339 4350 | survey | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2 rows × 144 columns
pois = pois.reset_index()[
[
"osmid",
"amenity",
"name",
"geometry",
]
].to_crs(simd.crs)
pois.head(2)| osmid | amenity | name | geometry | |
|---|---|---|---|---|
| 0 | 21042679 | restaurant | Happy Hot Pot | POINT (326363.790 672581.659) |
| 1 | 29173148 | restaurant | Miller & Carter | POINT (317864.915 675479.475) |
pois.explore("amenity", tiles="CartoDB Positron")/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/explore.py:400: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
elif pd.api.types.is_categorical_dtype(gdf[column]):
Make this Notebook Trusted to load map: File -> Trust Notebook
airbnb = gpd.read_file("data/edinburgh_airbnb_2023.gpkg")airbnb.price.head()0 $126.00
1 $269.00
2 $95.00
3 $172.00
4 $361.00
Name: price, dtype: object
airbnb["price_float"] = airbnb.price.str.strip("$").str.replace(",", "").astype(float)two_bed_homes = airbnb[
(airbnb["bedrooms"] == 2)
& (airbnb["property_type"] == "Entire rental unit")
& (airbnb["price_float"] < 300)
].copy()
two_bed_homes.head()| id | bedrooms | property_type | price | geometry | price_float | |
|---|---|---|---|---|---|---|
| 3 | 821573 | 2.0 | Entire rental unit | $172.00 | POINT (326748.646 674001.683) | 172.0 |
| 5 | 834777 | 2.0 | Entire rental unit | $264.00 | POINT (324950.724 673875.598) | 264.0 |
| 6 | 450745 | 2.0 | Entire rental unit | $177.00 | POINT (326493.725 672853.904) | 177.0 |
| 10 | 485856 | 2.0 | Entire rental unit | $157.00 | POINT (326597.124 673869.551) | 157.0 |
| 17 | 51505 | 2.0 | Entire rental unit | $155.00 | POINT (325393.807 674177.409) | 155.0 |
airbnb_pubs = two_bed_homes.sjoin_nearest(pois, how="left", max_distance=100)
airbnb_pubs.head()| id | bedrooms | property_type | price | geometry | price_float | index_right | osmid | amenity | name | |
|---|---|---|---|---|---|---|---|---|---|---|
| 3 | 821573 | 2.0 | Entire rental unit | $172.00 | POINT (326748.646 674001.683) | 172.0 | NaN | NaN | NaN | NaN |
| 5 | 834777 | 2.0 | Entire rental unit | $264.00 | POINT (324950.724 673875.598) | 264.0 | 658.0 | 4.114822e+09 | bar | Badger & Co |
| 6 | 450745 | 2.0 | Entire rental unit | $177.00 | POINT (326493.725 672853.904) | 177.0 | 32.0 | 5.043315e+08 | restaurant | Blonde |
| 10 | 485856 | 2.0 | Entire rental unit | $157.00 | POINT (326597.124 673869.551) | 157.0 | 550.0 | 3.071953e+09 | restaurant | Oink Hog Roast |
| 17 | 51505 | 2.0 | Entire rental unit | $155.00 | POINT (325393.807 674177.409) | 155.0 | 589.0 | 3.274256e+09 | bar | Bramble |
airbnb_pubs_unlimited = two_bed_homes.sjoin_nearest(
pois, how="left", max_distance=None, distance_col="distance"
)
airbnb_pubs_unlimited.head()| id | bedrooms | property_type | price | geometry | price_float | index_right | osmid | amenity | name | distance | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 3 | 821573 | 2.0 | Entire rental unit | $172.00 | POINT (326748.646 674001.683) | 172.0 | 692 | 4758881738 | restaurant | The Holyrood Room Restaurant | 193.470980 |
| 5 | 834777 | 2.0 | Entire rental unit | $264.00 | POINT (324950.724 673875.598) | 264.0 | 658 | 4114821564 | bar | Badger & Co | 13.631934 |
| 6 | 450745 | 2.0 | Entire rental unit | $177.00 | POINT (326493.725 672853.904) | 177.0 | 32 | 504331543 | restaurant | Blonde | 88.364090 |
| 10 | 485856 | 2.0 | Entire rental unit | $157.00 | POINT (326597.124 673869.551) | 157.0 | 550 | 3071953161 | restaurant | Oink Hog Roast | 24.399283 |
| 17 | 51505 | 2.0 | Entire rental unit | $155.00 | POINT (325393.807 674177.409) | 155.0 | 589 | 3274255605 | bar | Bramble | 72.692519 |
airbnb_pubs_unlimited.explore("distance", tiles="CartoDB Positron", scheme="naturalbreaks")/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/explore.py:400: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
elif pd.api.types.is_categorical_dtype(gdf[column]):
Make this Notebook Trusted to load map: File -> Trust Notebook
Count nearby features
pois_buffered = pois.set_geometry(pois.buffer(500))
pois_buffered.head()| osmid | amenity | name | geometry | |
|---|---|---|---|---|
| 0 | 21042679 | restaurant | Happy Hot Pot | POLYGON ((326863.790 672581.659, 326861.382 67... |
| 1 | 29173148 | restaurant | Miller & Carter | POLYGON ((318364.915 675479.475, 318362.507 67... |
| 2 | 44551391 | restaurant | Ciao Roma | POLYGON ((326531.916 673348.014, 326529.509 67... |
| 3 | 50503871 | restaurant | Urban Angel | POLYGON ((325818.638 674176.293, 325816.231 67... |
| 4 | 50503874 | restaurant | La Garrigue | POLYGON ((326593.951 673765.317, 326591.544 67... |
within_buffer = two_bed_homes.sjoin(pois_buffered)counts = within_buffer.groupby("id")["osmid"].count()
counts.head()id
51505 110
97472 9
118314 2
163870 29
216245 47
Name: osmid, dtype: int64
airbnb_w_count = two_bed_homes.merge(counts, on="id")
airbnb_w_count.head()| id | bedrooms | property_type | price | geometry | price_float | osmid | |
|---|---|---|---|---|---|---|---|
| 0 | 821573 | 2.0 | Entire rental unit | $172.00 | POINT (326748.646 674001.683) | 172.0 | 7 |
| 1 | 834777 | 2.0 | Entire rental unit | $264.00 | POINT (324950.724 673875.598) | 264.0 | 120 |
| 2 | 450745 | 2.0 | Entire rental unit | $177.00 | POINT (326493.725 672853.904) | 177.0 | 47 |
| 3 | 485856 | 2.0 | Entire rental unit | $157.00 | POINT (326597.124 673869.551) | 157.0 | 17 |
| 4 | 51505 | 2.0 | Entire rental unit | $155.00 | POINT (325393.807 674177.409) | 155.0 | 110 |
airbnb_w_count.explore("osmid", tiles="CartoDB Positron")/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/explore.py:400: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
elif pd.api.types.is_categorical_dtype(gdf[column]):
Make this Notebook Trusted to load map: File -> Trust Notebook
Extract from a raster
population = rioxarray.open_rasterio("data/edinburgh_population.tif", masked=True).squeeze()
population<xarray.DataArray (y: 221, x: 236)>
[52156 values with dtype=float64]
Coordinates:
band int64 1
* x (x) float64 3.096e+05 3.097e+05 ... 3.329e+05 3.33e+05
* y (y) float64 6.809e+05 6.808e+05 ... 6.591e+05 6.59e+05
spatial_ref int64 0
Attributes:
AREA_OR_POINT: Area
scale_factor: 1.0
add_offset: 0.0population.plot()<matplotlib.collections.QuadMesh at 0x7fce22dde050>

pop_cube = population.drop_vars(["band", "spatial_ref"]).xvec.extract_points(
two_bed_homes.geometry, "x", "y"
)
pop_cube.name = "population"
pop_cube<xarray.DataArray 'population' (geometry: 1212)>
[1212 values with dtype=float64]
Coordinates:
* geometry (geometry) object POINT (326748.64563561964 674001.6832108775) ...
index (geometry) int64 3 5 6 10 17 19 ... 7669 7671 7672 7677 7680 7684
Indexes:
geometry GeometryIndex (crs=EPSG:27700)
Attributes:
AREA_OR_POINT: Area
scale_factor: 1.0
add_offset: 0.0pop = pop_cube.xvec.to_geodataframe()
pop.head()| geometry | index | population | |
|---|---|---|---|
| 0 | POINT (326748.646 674001.683) | 3 | 2.000207 |
| 1 | POINT (324950.724 673875.598) | 5 | 15.106922 |
| 2 | POINT (326493.725 672853.904) | 6 | 176.059799 |
| 3 | POINT (326597.124 673869.551) | 10 | 15.964920 |
| 4 | POINT (325393.807 674177.409) | 17 | 17.008703 |
pop.explore("population", tiles="CartoDB Positron")/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/explore.py:400: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
elif pd.api.types.is_categorical_dtype(gdf[column]):
Make this Notebook Trusted to load map: File -> Trust Notebook
two_bed_homes["population"] = pop.populationArea interpolation
simd = gpd.read_file("data/edinburgh_simd_2020.gpkg")grid = tobler.util.h3fy(simd, resolution=8)
grid.head(2)/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/pyproj/crs/crs.py:1293: UserWarning: You will likely lose important projection information when converting to a PROJ string from another format. See: https://proj.org/faq.html#what-is-the-best-format-for-describing-coordinate-reference-systems
proj = self._crs.to_proj4(version=version)
| geometry | |
|---|---|
| hex_id | |
| 8819727587fffff | POLYGON ((315204.837 665386.180, 314740.392 66... |
| 881972675bfffff | POLYGON ((315051.574 661661.196, 314587.029 66... |
ax = simd.boundary.plot(linewidth=.5)
grid.boundary.plot(ax=ax, color="red", linewidth=.5)<Axes: >

interpolated = tobler.area_weighted.area_interpolate(
simd,
grid,
extensive_variables=["EmpNumDep", "IncNumDep"],
intensive_variables=["EmpRate", "IncRate"],
)interpolated.explore("EmpRate", tiles="CartoDB Positron")/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/explore.py:400: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
elif pd.api.types.is_categorical_dtype(gdf[column]):
Make this Notebook Trusted to load map: File -> Trust Notebook
simd[["EmpRate", "geometry"]].explore("EmpRate", tiles="CartoDB Positron")/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/explore.py:400: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
elif pd.api.types.is_categorical_dtype(gdf[column]):
Make this Notebook Trusted to load map: File -> Trust Notebook
Point interpolation
two_bed_homes.explore("price_float", tiles="CartoDB Positron")/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/explore.py:400: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
elif pd.api.types.is_categorical_dtype(gdf[column]):
Make this Notebook Trusted to load map: File -> Trust Notebook
interrpolation_model = neighbors.KNeighborsRegressor(n_neighbors=10, weights="distance")coords = two_bed_homes.get_coordinates()
coords.head()| x | y | |
|---|---|---|
| 3 | 326748.645636 | 674001.683211 |
| 5 | 324950.723888 | 673875.598033 |
| 6 | 326493.725178 | 672853.903917 |
| 10 | 326597.123684 | 673869.551295 |
| 17 | 325393.807222 | 674177.408961 |
interrpolation_model.fit(
coords, two_bed_homes.price_float
)KNeighborsRegressor(n_neighbors=10, weights='distance')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
KNeighborsRegressor(n_neighbors=10, weights='distance')
grid_coordinates = grid.centroid.get_coordinates()
grid_coordinates.head()| x | y | |
|---|---|---|
| hex_id | ||
| 8819727587fffff | 315127.615117 | 664904.741910 |
| 881972675bfffff | 314974.336174 | 661179.542360 |
| 8819727627fffff | 320461.899805 | 673264.202086 |
| 8819723927fffff | 326416.235455 | 671673.366871 |
| 8819727569fffff | 312570.031896 | 673867.901209 |
price_on_grid = interrpolation_model.predict(grid_coordinates)grid["price"] = price_on_gridgrid.explore("price", tiles="CartoDB Positron")/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/explore.py:400: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
elif pd.api.types.is_categorical_dtype(gdf[column]):
Make this Notebook Trusted to load map: File -> Trust Notebook
Tip
https://docs.scipy.org/doc/scipy/reference/generated/scipy.interpolate.griddata.html
https://pyinterpolate.readthedocs.io/en/stable/index.html
text here
Map synthesis
Count neighbours
distance_500 = graph.Graph.build_distance_band(two_bed_homes, 500)distance_500.cardinalities.head()focal
3 59
5 67
6 35
10 70
17 66
Name: cardinalities, dtype: int64
two_bed_homes["within 500m"] = distance_500.cardinalitiestwo_bed_homes.explore("within 500m", tiles="CartoDB Positron")/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/explore.py:400: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
elif pd.api.types.is_categorical_dtype(gdf[column]):
Make this Notebook Trusted to load map: File -> Trust Notebook
Lagged variables
distance_500_row = distance_500.transform("r")two_bed_homes["price_lag"] = distance_500_row.lag(two_bed_homes.price_float)two_bed_homes.explore("price_lag", tiles="CartoDB Positron")/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/explore.py:400: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
elif pd.api.types.is_categorical_dtype(gdf[column]):
Make this Notebook Trusted to load map: File -> Trust Notebook
distance_500_row[3].head()neighbor
10 0.016949
150 0.016949
232 0.016949
333 0.016949
528 0.016949
Name: weight, dtype: float64
distance_500_gaussian = graph.Graph.build_distance_band(two_bed_homes, 500, binary=False)/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/libpysal/graph/base.py:656: RuntimeWarning: divide by zero encountered in power
kernel=lambda distances, alpha: np.power(distances, alpha),
distance_500_gaussian[3].head()neighbor
10 0.004974
150 0.002261
232 0.005490
333 0.002426
528 0.002451
Name: weight, dtype: float64
distance_500_gaussian.transform("r")[3].head()neighbor
10 0.028768
150 0.013077
232 0.031754
333 0.014028
528 0.014177
Name: weight, dtype: float64
two_bed_homes["price_lag_w"] = distance_500_gaussian.transform("r").lag(
two_bed_homes.price_float
)two_bed_homes.head()| id | bedrooms | property_type | price | geometry | price_float | population | within 500m | price_lag | price_lag_w | |
|---|---|---|---|---|---|---|---|---|---|---|
| 3 | 821573 | 2.0 | Entire rental unit | $172.00 | POINT (326748.646 674001.683) | 172.0 | 15.964920 | 59 | 185.423729 | 189.633253 |
| 5 | 834777 | 2.0 | Entire rental unit | $264.00 | POINT (324950.724 673875.598) | 264.0 | 56.425114 | 67 | 177.119403 | 164.360323 |
| 6 | 450745 | 2.0 | Entire rental unit | $177.00 | POINT (326493.725 672853.904) | 177.0 | 140.532318 | 35 | 141.171429 | 136.599523 |
| 10 | 485856 | 2.0 | Entire rental unit | $157.00 | POINT (326597.124 673869.551) | 157.0 | 299.309570 | 70 | 190.785714 | 197.843603 |
| 17 | 51505 | 2.0 | Entire rental unit | $155.00 | POINT (325393.807 674177.409) | 155.0 | 57.981670 | 66 | 206.015152 | 209.656652 |